* Calculate industry level tariffs from TRAINS - applied MFN rates

* Download data: http://wits.worldbank.org/WITS/WITS/AdvanceQuery/TRAINSBulkExport/TRAINSBulkExportQueryDefination.aspx?Page=TRAINSBulkExport
* TRAINS -> Advanced query -> Bulk download (TRAINS) -> MFN applied rates (including AVE)
* Unzip all files: unzip "*.zip"

global tmp "/tmp"

* Country codes
use /users/andreas/dropbox/data/comtrade/country_codes, clear
keep iso2 iso3 isonum start end
drop if trim(iso3)==""
drop if trim(iso2)==""
drop start end
duplicates examples isonum
save $tmp/tmp, replace

* ---------------------------
* Import tariff data
* ---------------------------

cd /users/andreas/downloads/MFN
global tmp "/tmp"

set more off
local files : dir "`c(pwd)'" files "*.CSV"

* Save in Stata format
foreach file in `files' {
	insheet using `file', comma clear
	import delimited using `file', clear
    save `file'.dta, replace
}

* Append them
clear
local files : dir "`c(pwd)'" files "*.dta"
foreach file in `files' {
	append using `file'	
}
save /users/andreas/dropbox/work/patstat/data/TRAINS/allMFN.dta, replace


* ---------------------------
* Preliminaries: HS Combined to ISICr3 concordance
* ---------------------------
cd /users/andreas/dropbox/work/patstat/data
import delim correspondence/HS_Combined_to_ISICr3/HS_to_I3, clear
ren hscombinedproductcode hsc
ren isicrevision3productcode isicr3
keep hsc isicr3 
save correspondence/HS_Combined_to_ISICr3/HS_to_I3, replace

* ---------------------------
* Preliminaries : ISIC4 -> NACE2
* ---------------------------
cd /users/andreas/dropbox/work/patstat/data
use "Correspondence/ISIC4_NACE2/ISIC4_NACE2", clear
destring isic4code, replace ignore("A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "P" "Q" "R" "S" "T" "U" )
destring nace2code, replace ignore("A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "P" "Q" "R" "S" "T" "U" )
drop if missing( isic4code)
drop if missing( nace2code)
keep isic4code nace2code
save $tmp/isic4_nace2, replace

* ---------------------------
* Convert HS to HS combined
* ---------------------------
cd /users/andreas/dropbox/work/patstat/data
use TRAINS/allMFN, clear

* We have 30% duplicates in the raw data - drop them
duplicates examples reporter_iso productcode year
duplicates drop reporter_iso productcode year, force  

gen hs92 = productcode if year<1996
merge m:1 hs92 using correspondence/HS_Combined_to_HS/Concordance_HS_to_H0
drop _merge
gen hs96 = productcode if year>=1996 & year<2002
merge m:1 hs96 using correspondence/HS_Combined_to_HS/Concordance_HS_to_H1, update
drop _merge
gen hs02 = productcode if year>=2002 & year<2007
merge m:1 hs02 using correspondence/HS_Combined_to_HS/Concordance_HS_to_H2, update
drop _merge
gen hs07 = productcode if year>=2007 & year<2012
merge m:1 hs07 using correspondence/HS_Combined_to_HS/Concordance_HS_to_H3, update
drop _merge
gen hs12 = productcode if year>=2012
merge m:1 hs12 using correspondence/HS_Combined_to_HS/Concordance_HS_to_H4, update
drop _merge // Note: HSC-HS12 match is not good

* Get ISO2 country names
ren reporter_iso isonum
merge m:1 isonum using $tmp/tmp, keep(match master)
tab _merge
replace iso2="TW" if isonum==158
replace iso2="NO" if isonum==578
replace iso2="CH" if isonum==756
replace iso2="US" if isonum==840
replace iso2="EU" if isonum==918
drop if iso2==""
drop _merge
drop hs92 hs96 hs02 hs07 hs12 nomencode isonum productcode
drop if hsc==.

save $tmp/allMFN, replace


* ------------------------------
* Add EU countries
* ------------------------------
set more off
use $tmp/allMFN, clear
keep if iso2=="EU"
gen eu=1
save $tmp/tmpEU, replace

* EU 1986 countries
foreach cty in BE FR DE IT LU NL DK IE GB GR ES PT {
  use $tmp/tmpEU, clear
  replace iso2="`cty'"
  save $tmp/tmp`cty', replace
}

* EU 1995 countries
foreach cty in AT FI SE {
  use $tmp/tmpEU, clear
  keep if year>=1995
  replace iso2="`cty'"
  save $tmp/tmp`cty', replace
}

* EU 2004 countries
foreach cty in CZ EE CY LV LT HU MT PL SK SI {
  use $tmp/tmpEU, clear
  keep if year>=2004
  replace iso2="`cty'"
  save $tmp/tmp`cty', replace
}

* EU 2007 countries
foreach cty in RO BG {
  use $tmp/tmpEU, clear
  keep if year>=2007
  replace iso2="`cty'"
  save $tmp/tmp`cty', replace
}

* Add them back
use $tmp/allMFN, clear
drop if iso2=="EU"
foreach cty in BE FR DE IT LU NL DK IE GB GR ES PT AT FI SE CZ EE CY LV LT HU MT PL SK SI RO BG {
  append using $tmp/tmp`cty'
}
duplicates examples iso2 hsc year  // No duplicates
save $tmp/allMFN2, replace

* ------------------------------
* Interpolate between years
* ------------------------------
use $tmp/allMFN2, clear
keep iso2 hsc year simple 
egen id = group(iso2 hsc)
tsset id year
tsfill, full  
bys id: ipolate simple year, gen(tariff)

* Add back HS and ISO code
egen tmp1=mode(hsc), by(id)
egen tmp2=mode(iso2), by(id)
drop hsc iso2
ren tmp1 hsc 
ren tmp2 iso2
keep iso2 hsc year simple tariff
order iso2 hsc year
sort iso2 hsc year

* Extrapolate +- 3 years
gen tmp1 = year if tariff~=.
egen miny = min(tmp1), by(iso2 hsc)
egen maxy = max(tmp1), by(iso2 hsc)
gen tmp2 = tariff if year==miny
gen tmp3 = tariff if year==maxy
egen firsttau = max(tmp2), by(iso2 hsc)
egen lasttau = max(tmp3), by(iso2 hsc)
gen lag1 = miny-year
gen lag2 = year-maxy
replace tariff=firsttau if miny<=1995 & (lag1<=3 & lag1>0) & tariff==.
replace tariff=lasttau if maxy>=1999 & (lag2<=3 & lag2>0)  & tariff==.
keep iso2 hsc year simple tariff 
save TRAINS/allMFN_HScombined_interpl, replace


* ------------------------------
* Balance panel and aggregate to ISICr3 -> NACEr2
* ------------------------------

use TRAINS/allMFN_HScombined_interpl, clear
merge m:1 hsc using correspondence/HS_Combined_to_ISICr3/HS_to_I3, keep(match master)
* We match 93% of the data
keep if _merge==3
keep if year>=1992 & year<=2009
drop if tariff==.
bys iso2 hsc: gen nyear=[_N]
keep if nyear==18
sort iso2 hsc year
collapse (mean) tariff (median) mtau=tariff (count) hsc, by(year iso2 isicr3)

* Merge with ISIC rev3.1
ren isicr3 isic3code
joinby isic3code using Correspondence/ISIC3_to_ISIC3.1/ISIC30_ISIC31.dta, unmatched(master)
collapse (mean) tariff mtau (count) isic3code, by(iso2 year isic31code)  // If there are many r3.1 codes for a r3 code, we take the simple average

* Merge with ISIC rev4
joinby isic31code using Correspondence/ISIC4_ISIC3.1/ISIC4_ISIC31.dta, unmatched(master)
collapse (mean) tariff mtau (count) isic31code, by(iso2 year isic4code)

* Merge with NACE rev2
joinby isic4code using $tmp/isic4_nace2, unmatched(master)
collapse (mean) tariff mtau (count) isic4code, by(iso2 year nace2code) 
drop if nace2code==.
save TRAINS/allMFN_NACEr2_interpl, replace

